Goal 1: Determine how the different trimming parameters change the methylation stats notebook link for parameters
Goal 2: Determine if any methylation stats correlate with: DNA extraction date, Lib prep date, life stage or treatments
Goal 3: Generate methylation statistics
# Read in required libraries
library("reshape")
#library(plyr)
library("dplyr")
library("tidyverse")
library("Rmisc")
library(gridExtra)
library(ggpubr)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(lme4)
library(lmerTest)
library(car)
library(effects)
library(ggfortify)
library(cowplot)
library(vegan)
library(corrr)
library(ggcorrplot)
library(GGally)
library(broom)
library(cowplot)
library(arsenal)
library(patchwork)
library(tidyr)
library(ggrepel)
# load data
data <- read.csv("../data/WGBS/Thermal_Transplant_Methylseq_Stats.csv")
data2 <- data
# Removing characters in columns and turning the values numeric
data2$percent_Aligned <- as.numeric(sub("%","",data2$percent_Aligned))
data2$percent_Dups <- as.numeric(sub("%","",data2$percent_Dups))
data2$percent_Aligned <- as.numeric(sub("%","",data2$percent_Aligned))
data2$percent_mCHG <- as.numeric(sub("%","",data2$percent_mCHG))
data2$percent_mCHH <- as.numeric(sub("%","",data2$percent_mCHH))
data2$percent_mCpG <- as.numeric(sub("%","",data2$percent_mCpG))
data2$R1_percent_BP_Trimmed <- as.numeric(sub("%","",data2$R1_percent_BP_Trimmed))
data2$R1_percent_Dups <- as.numeric(sub("%","",data2$R1_percent_Dups))
data2$R1_percent_GC <- as.numeric(sub("%","",data2$R1_percent_GC))
data2$R2_percent_BP_Trimmed <- as.numeric(sub("%","",data2$R2_percent_BP_Trimmed))
data2$R2_percent_Dups <- as.numeric(sub("%","",data2$R2_percent_Dups))
data2$R2_percent_GC <- as.numeric(sub("%","",data2$R2_percent_GC))
data2$Mean_cov <- as.numeric(sub("X","",data2$Mean_cov))
data2$Median_cov <- as.numeric(sub("X","",data2$Median_cov))
#removing metadata columns
data3 <- data2[-c(1, 4:9)]
data4 <- melt(data3, id = c("Trimming", "Sample"))
data5 <- data4 %>%
pivot_wider(names_from = Trimming, values_from = value)
trim_corr_plot <- ggplot(data = data5, aes(x=Original, y=Trim_3))+
ylab("Trim_3")+ xlab("Original") +
geom_point()+
geom_smooth(method = "lm") +
# stat_regline_equation(label.y = 2.0, aes(label = ..eq.label..)) +
stat_regline_equation(label.y.npc = 'bottom', label.x.npc = 'center', aes(label = ..rr.label..)) +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~ variable, scales = "free")
ggsave(filename="../output/WGBS/Trim_corr_plot.jpeg", plot=trim_corr_plot, dpi=300, width=12, height=12, units="in")
# Just using Trim_3 data
data6 <- data2 %>% filter(Trimming == "Trim_3")
data7 <- data6[-c(1:2, 19:26)]
data8 <- melt(data7, id = c(1:7))
# Turning variable columns into factors
col <- c("Sample", "Coral.ID", "History", "Life_Stage", "Gel_Result", "Extraction_Date", "PMS_Date")
data8[col] <- lapply(data8[col], factor)
data8A <- data8 %>% filter(Life_Stage == "Adult")
AHistory_Box <- ggplot(data8A, aes(x=History, y=value)) +
geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
geom_point(pch = 21) +
xlab("History") + ylab("Value") + #Axis titles
theme_bw() +
theme(panel.border = element_rect(color="black", fill=NA, size=0.75),
panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~ variable, scales = "free")
ggsave(filename="../output/WGBS/AHistory_Box.jpeg", plot=AHistory_Box, dpi=300, width=12, height=12, units="in")
data8L <- data8 %>% filter(Life_Stage == "Larvae")
LHistory_Box <- ggplot(data8L, aes(x=History, y=value)) +
geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
geom_point(pch = 21) +
xlab("History") + ylab("Value") + #Axis titles
theme_bw() +
theme(panel.border = element_rect(color="black", fill=NA, size=0.75),
panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~ variable, scales = "free")
ggsave(filename="../output/WGBS/LHistory_Box.jpeg", plot=LHistory_Box, dpi=300, width=12, height=12, units="in")
LifeStage_Box <- ggplot(data8, aes(x=Life_Stage, y=value)) +
geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
geom_point(pch = 21) +
xlab("Life Stage") + ylab("Value") + #Axis titles
theme_bw() +
theme(panel.border = element_rect(color="black", fill=NA, size=0.75),
panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~ variable, scales = "free")
ggsave(filename="../output/WGBS/LifeStage_Box.jpeg", plot=LifeStage_Box, dpi=300, width=12, height=12, units="in")
Gel_Box <- ggplot(data8, aes(x=Gel_Result, y=value)) +
geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
geom_point(pch = 21) +
xlab("Gel Result") + ylab("Value") + #Axis titles
theme_bw() +
theme(panel.border = element_rect(color="black", fill=NA, size=0.75),
panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~ variable, scales = "free")
ggsave(filename="../output/WGBS/Gel_Box.jpeg", plot=Gel_Box, dpi=300, width=12, height=12, units="in")
Extraction_Box <- ggplot(data8, aes(x=Extraction_Date, y=value)) +
geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
geom_point(pch = 21) +
xlab("Extraction Date") + ylab("Value") + #Axis titles
theme_bw() +
theme(panel.border = element_rect(color="black", fill=NA, size=0.75),
panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~ variable, scales = "free")
ggsave(filename="../output/WGBS/Extraction_Box.jpeg", plot=Extraction_Box, dpi=300, width=12, height=12, units="in")
PMS_Box <- ggplot(data8, aes(x=PMS_Date, y=value)) +
geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.5), alpha = 0.7) +
geom_point(pch = 21) +
xlab("PMS Library Prep Date") + ylab("Value") + #Axis titles
theme_bw() +
theme(panel.border = element_rect(color="black", fill=NA, size=0.75),
panel.grid.major = element_blank(), #Makes background theme white
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(~ variable, scales = "free")
ggsave(filename="../output/WGBS/PMS_Box.jpeg", plot=PMS_Box, dpi=300, width=12, height=12, units="in")
#Adult
data9A <- data8A %>%
select(Sample, History, variable, value) %>%
pivot_wider(names_from = variable, values_from = value) %>%
select(-Sample)
tableby_adult <- tableby(History ~., data = data9A)
summary(tableby_adult)
| Patch-Ambient-Patch (N=4) | Patch-Ambient-Rim (N=4) | Patch-Heated-Patch (N=4) | Patch-Heated-Rim (N=4) | Rim-Ambient-Patch (N=4) | Rim-Ambient-Rim (N=4) | Rim-Heated-Patch (N=4) | Rim-Heated-Rim (N=4) | Total (N=32) | p value | |
|---|---|---|---|---|---|---|---|---|---|---|
| percent_mCpG | 0.098 | |||||||||
| Mean (SD) | 7.950 (0.173) | 7.825 (0.499) | 8.150 (0.191) | 8.175 (0.585) | 7.425 (0.222) | 7.600 (0.476) | 7.500 (0.616) | 7.525 (0.340) | 7.769 (0.468) | |
| Range | 7.700 - 8.100 | 7.400 - 8.500 | 8.000 - 8.400 | 7.500 - 8.800 | 7.200 - 7.700 | 7.300 - 8.300 | 6.800 - 8.300 | 7.100 - 7.800 | 6.800 - 8.800 | |
| percent_mCHG | 0.942 | |||||||||
| Mean (SD) | 0.800 (0.356) | 0.825 (0.519) | 0.675 (0.206) | 0.625 (0.050) | 0.675 (0.222) | 0.600 (0.141) | 0.750 (0.173) | 0.775 (0.419) | 0.716 (0.275) | |
| Range | 0.500 - 1.300 | 0.500 - 1.600 | 0.500 - 0.900 | 0.600 - 0.700 | 0.500 - 1.000 | 0.500 - 0.800 | 0.600 - 0.900 | 0.500 - 1.400 | 0.500 - 1.600 | |
| percent_mCHH | 0.959 | |||||||||
| Mean (SD) | 1.450 (0.436) | 1.400 (0.678) | 1.200 (0.245) | 1.150 (0.173) | 1.325 (0.519) | 1.175 (0.171) | 1.325 (0.299) | 1.225 (0.544) | 1.281 (0.386) | |
| Range | 1.200 - 2.100 | 0.900 - 2.400 | 1.000 - 1.500 | 0.900 - 1.300 | 1.000 - 2.100 | 1.000 - 1.400 | 1.000 - 1.700 | 0.800 - 2.000 | 0.800 - 2.400 | |
| M_Cs | 0.961 | |||||||||
| Mean (SD) | 340.750 (57.838) | 320.075 (149.378) | 400.300 (77.135) | 312.650 (140.697) | 346.075 (48.523) | 375.125 (113.891) | 357.475 (114.043) | 378.950 (168.775) | 353.925 (106.250) | |
| Range | 260.700 - 391.400 | 119.400 - 473.000 | 333.300 - 509.700 | 123.000 - 451.300 | 284.100 - 390.300 | 282.600 - 540.600 | 222.700 - 485.700 | 139.400 - 533.800 | 119.400 - 540.600 | |
| percent_Dups | 0.300 | |||||||||
| Mean (SD) | 15.750 (4.948) | 14.250 (2.779) | 15.850 (3.328) | 11.050 (1.066) | 12.875 (1.733) | 15.475 (1.660) | 12.575 (0.950) | 16.150 (6.125) | 14.247 (3.432) | |
| Range | 11.200 - 21.100 | 11.800 - 16.900 | 11.600 - 18.600 | 9.900 - 12.400 | 11.500 - 15.400 | 13.800 - 17.000 | 11.500 - 13.600 | 10.600 - 24.900 | 9.900 - 24.900 | |
| percent_Aligned | 0.042 | |||||||||
| Mean (SD) | 34.675 (1.394) | 34.900 (2.255) | 37.625 (1.688) | 34.300 (2.357) | 37.500 (1.472) | 33.625 (1.962) | 37.100 (2.906) | 33.950 (2.293) | 35.459 (2.432) | |
| Range | 33.000 - 36.400 | 31.600 - 36.400 | 35.300 - 39.100 | 31.700 - 37.200 | 35.300 - 38.400 | 31.200 - 35.500 | 32.800 - 39.200 | 30.800 - 35.700 | 30.800 - 39.200 | |
| Ins_size | 0.599 | |||||||||
| Mean (SD) | 96.750 (8.884) | 93.500 (7.594) | 98.750 (4.573) | 103.750 (26.475) | 89.000 (8.165) | 103.000 (18.511) | 90.750 (10.844) | 105.500 (12.767) | 97.625 (13.585) | |
| Range | 91.000 - 110.000 | 86.000 - 103.000 | 93.000 - 104.000 | 85.000 - 143.000 | 77.000 - 95.000 | 88.000 - 128.000 | 77.000 - 103.000 | 91.000 - 122.000 | 77.000 - 143.000 | |
| Median_cov | 0.850 | |||||||||
| Mean (SD) | 2.250 (0.500) | 1.750 (1.258) | 2.750 (0.957) | 2.000 (1.414) | 1.750 (0.500) | 2.000 (0.816) | 2.000 (0.816) | 1.750 (1.258) | 2.031 (0.933) | |
| Range | 2.000 - 3.000 | 0.000 - 3.000 | 2.000 - 4.000 | 0.000 - 3.000 | 1.000 - 2.000 | 1.000 - 3.000 | 1.000 - 3.000 | 0.000 - 3.000 | 0.000 - 4.000 | |
| Mean_cov | 0.973 | |||||||||
| Mean (SD) | 4.025 (0.665) | 3.800 (1.683) | 4.700 (1.003) | 3.700 (1.817) | 4.200 (0.497) | 4.275 (1.021) | 4.400 (1.476) | 4.300 (1.851) | 4.175 (1.223) | |
| Range | 3.100 - 4.600 | 1.500 - 5.400 | 3.800 - 6.100 | 1.300 - 5.600 | 3.600 - 4.700 | 3.400 - 5.700 | 2.600 - 6.100 | 1.600 - 5.800 | 1.300 - 6.100 |
table_adult <- as.data.frame(summary(tableby_adult))
write.csv(table_adult, "../output/WGBS/Adult_WGBS_Summarystats.csv")
#Larvae
data9L <- data8L %>%
select(Sample, History, variable, value) %>%
pivot_wider(names_from = variable, values_from = value) %>%
select(-Sample)
tableby_larvae <- tableby(History ~., data = data9L)
summary(tableby_larvae)
| Patch-Ambient-Patch (N=4) | Patch-Heated-Patch (N=4) | Rim-Ambient-Patch (N=4) | Rim-Heated-Patch (N=3) | Total (N=15) | p value | |
|---|---|---|---|---|---|---|
| percent_mCpG | 0.066 | |||||
| Mean (SD) | 7.425 (0.675) | 8.400 (0.258) | 7.400 (0.408) | 7.600 (0.693) | 7.713 (0.637) | |
| Range | 6.800 - 8.300 | 8.100 - 8.700 | 6.800 - 7.700 | 7.200 - 8.400 | 6.800 - 8.700 | |
| percent_mCHG | 0.830 | |||||
| Mean (SD) | 0.775 (0.350) | 0.650 (0.173) | 0.675 (0.150) | 0.633 (0.153) | 0.687 (0.210) | |
| Range | 0.600 - 1.300 | 0.500 - 0.800 | 0.600 - 0.900 | 0.500 - 0.800 | 0.500 - 1.300 | |
| percent_mCHH | 0.117 | |||||
| Mean (SD) | 1.250 (0.379) | 0.850 (0.173) | 1.175 (0.150) | 0.967 (0.058) | 1.067 (0.266) | |
| Range | 1.000 - 1.800 | 0.700 - 1.000 | 1.000 - 1.300 | 0.900 - 1.000 | 0.700 - 1.800 | |
| M_Cs | 0.196 | |||||
| Mean (SD) | 325.975 (100.575) | 448.725 (115.826) | 396.900 (120.304) | 498.567 (27.567) | 412.140 (111.458) | |
| Range | 194.400 - 439.400 | 311.900 - 594.300 | 300.200 - 561.800 | 478.500 - 530.000 | 194.400 - 594.300 | |
| percent_Dups | 0.342 | |||||
| Mean (SD) | 13.200 (2.288) | 16.175 (5.121) | 12.275 (2.497) | 15.333 (0.321) | 14.173 (3.290) | |
| Range | 11.000 - 16.100 | 11.100 - 23.300 | 10.800 - 16.000 | 15.100 - 15.700 | 10.800 - 23.300 | |
| percent_Aligned | 0.511 | |||||
| Mean (SD) | 41.000 (2.094) | 40.450 (1.112) | 39.100 (1.988) | 40.200 (1.670) | 40.187 (1.731) | |
| Range | 39.500 - 44.100 | 38.900 - 41.500 | 36.300 - 41.000 | 38.700 - 42.000 | 36.300 - 44.100 | |
| Ins_size | 0.004 | |||||
| Mean (SD) | 80.250 (3.862) | 107.500 (6.403) | 88.750 (10.243) | 96.000 (10.536) | 92.933 (12.803) | |
| Range | 76.000 - 84.000 | 103.000 - 117.000 | 76.000 - 98.000 | 86.000 - 107.000 | 76.000 - 117.000 | |
| Median_cov | 0.028 | |||||
| Mean (SD) | 1.750 (0.500) | 3.000 (0.816) | 2.250 (0.500) | 3.000 (0.000) | 2.467 (0.743) | |
| Range | 1.000 - 2.000 | 2.000 - 4.000 | 2.000 - 3.000 | 3.000 - 3.000 | 1.000 - 4.000 | |
| Mean_cov | 0.331 | |||||
| Mean (SD) | 4.175 (1.276) | 5.050 (1.190) | 4.825 (1.302) | 5.900 (0.608) | 4.927 (1.200) | |
| Range | 2.500 - 5.600 | 3.600 - 6.500 | 3.800 - 6.600 | 5.500 - 6.600 | 2.500 - 6.600 |
table_Larvae <- as.data.frame(summary(tableby_larvae))
write.csv(table_Larvae, "../output/WGBS/Larvae_WGBS_Summarystats.csv")